#!/usr/bin/python
# -*- coding: utf-8 -*-

import MySQLdb

conn = MySQLdb.Connection("172.16.7.58", "root", "researchsucks", "X", 3306)
conn.set_character_set("utf8");
cur = conn.cursor()

tag_df = {}
rec = {}
cur.execute("select video_id, baike_tag from pop_history_baike_tags")
for record in cur.fetchall():
    video = record[0]
    tag = record[1]
    # 对每个文档(视频)只统计一次df
    if video not in rec:
        rec[video] = {}
    if tag in rec[video]:
        continue
    rec[video][tag] = 1

    if tag in tag_df:
        tag_df[tag] = tag_df[tag] + 1
    else:
        tag_df[tag] = 1

out = open('/home/chaojiansong/videobook/notinsvn/baike_tag_df', 'w')
for tag, df in sorted(tag_df.items(), key = lambda x: x[1], reverse = True):
    out.write(tag + '\n')
    out.write(str(df) + '\n')
out.close()
cur.close()
conn.close()
